{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "34ffe5a3-f269-4630-af59-05af2f656400",
   "metadata": {},
   "source": [
    "[![Static Badge](https://img.shields.io/badge/notebook-open_in_colab-blue?style=flat&logo=googlecolab&color=blue)](https://colab.research.google.com/drive/1QsYjUX3DgS8ccvDtxEgLeHHmbtPViIqV?usp=drive_link)\n",
    "\n",
    "### We recommend using the [Google Colab](https://colab.research.google.com/drive/1QsYjUX3DgS8ccvDtxEgLeHHmbtPViIqV?usp=drive_link) verion of the notebook!"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f48bfa5b-05ca-493e-a892-0eff6dbcb4a6",
   "metadata": {},
   "source": [
    "# Convert UCR data to Orion format\n",
    "\n",
    "In this notebook we download the data and reformat it\n",
    "as Orion pipelines expect.\n",
    "\n",
    "### Download the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ac981ead",
   "metadata": {
    "id": "ac981ead"
   },
   "outputs": [],
   "source": [
    "# download dataset & unzip\n",
    "\n",
    "import io\n",
    "import os\n",
    "import urllib\n",
    "import zipfile\n",
    "\n",
    "DATA_URL = 'https://www.cs.ucr.edu/~eamonn/time_series_data_2018/UCR_TimeSeriesAnomalyDatasets2021.zip'\n",
    "\n",
    "response = urllib.request.urlopen(DATA_URL)\n",
    "bytes_io = io.BytesIO(response.read())\n",
    "\n",
    "with zipfile.ZipFile(bytes_io) as zf:\n",
    "    zf.extractall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8c719486",
   "metadata": {
    "id": "8c719486"
   },
   "outputs": [],
   "source": [
    "DATA_PATH = os.path.join('AnomalyDatasets_2021',\n",
    "                         'UCR_TimeSeriesAnomalyDatasets2021',\n",
    "                         'FilesAreInHere',\n",
    "                         'UCR_Anomaly_FullData')\n",
    "\n",
    "SAVE_TO = 'UCR'\n",
    "os.makedirs(SAVE_TO, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "23207f6e",
   "metadata": {
    "id": "23207f6e"
   },
   "outputs": [],
   "source": [
    "import csv\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c5b24d9",
   "metadata": {
    "id": "3c5b24d9"
   },
   "source": [
    "#### Format\n",
    "\n",
    "012_UCR_Anomaly_tiltAPB1_100000_114283_114350.txt\n",
    "\n",
    "- `012` Dataset number\n",
    "- `tiltAPB1` Mnemonic name\n",
    "- `100000` From 1 to X is training data\n",
    "- `114283` Begin anomaly\n",
    "- `114350` End anomaly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "bd411a2b",
   "metadata": {
    "id": "bd411a2b",
    "outputId": "c69079c0-b034-4a55-8492-3fb96492db47"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>timestamp</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1222819200</td>\n",
       "      <td>1990.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1222819500</td>\n",
       "      <td>1996.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1222819800</td>\n",
       "      <td>1958.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1222820100</td>\n",
       "      <td>1958.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1222820400</td>\n",
       "      <td>1923.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    timestamp   value\n",
       "0  1222819200  1990.0\n",
       "1  1222819500  1996.0\n",
       "2  1222819800  1958.0\n",
       "3  1222820100  1958.0\n",
       "4  1222820400  1923.0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def build_df(data, start=0):\n",
    "    index = np.array(range(start, start + len(data)))\n",
    "    step = 300\n",
    "    initial_time = 1222819200\n",
    "    timestamp = index * step + initial_time\n",
    "\n",
    "    if len(data.shape) > 1 and data.shape[1] > 1:\n",
    "        print(\"MULTIVARIATE\")\n",
    "        df = pd.DataFrame(data)\n",
    "        df['timestamp'] = timestamp\n",
    "    else:\n",
    "        df = pd.DataFrame({'timestamp': timestamp, 'value': data.reshape(-1, )})\n",
    "\n",
    "    df['timestamp'] = df['timestamp'].astype('int64')\n",
    "    return df\n",
    "\n",
    "df = build_df(np.loadtxt(DATA_PATH + '/204_UCR_Anomaly_CHARISfive_12412_15000_15070.txt'))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d17be27d",
   "metadata": {
    "id": "d17be27d",
    "outputId": "1cd182c7-452d-485b-cb96-338452d315f6"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [00:19<00:00, 12.84it/s]\n"
     ]
    }
   ],
   "source": [
    "files = os.listdir(DATA_PATH)\n",
    "file_names, train_sizes, intervals = [], [], []\n",
    "\n",
    "for file in tqdm(files):\n",
    "    file_num_str, _, _, file_name, train_size_str, begin_str, end_str = file.split(\"_\")\n",
    "\n",
    "    train_size, begin_anomaly = int(train_size_str), int(begin_str)\n",
    "    end_anomaly = int(end_str.split('.')[0])\n",
    "    file_name = file_num_str + \"-\" + file_name\n",
    "\n",
    "    # get timestamp from data\n",
    "    df = build_df(np.loadtxt(os.path.join(DATA_PATH, file)))\n",
    "    begin_anomaly = int(df.timestamp.iloc[begin_anomaly])\n",
    "    end_anomaly = int(df.timestamp.iloc[end_anomaly])\n",
    "\n",
    "    # train - test split\n",
    "    train_df = df.iloc[: train_size]\n",
    "    test_df = df.iloc[train_size: ]\n",
    "\n",
    "    # save file\n",
    "    train_df.to_csv(SAVE_TO + '/{}-train.csv'.format(file_name), index=False)\n",
    "    test_df.to_csv(SAVE_TO + '/{}-test.csv'.format(file_name), index=False)\n",
    "    df.to_csv(SAVE_TO + '/{}.csv'.format(file_name), index=False)\n",
    "\n",
    "    file_names.append(file_name)\n",
    "    train_sizes.append(train_size)\n",
    "    intervals.append([begin_anomaly, end_anomaly])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3e639635",
   "metadata": {
    "id": "3e639635"
   },
   "outputs": [],
   "source": [
    "rows = []\n",
    "for index, file_name in enumerate(file_names):\n",
    "    row = [file_name, [intervals[index]]]\n",
    "    rows.append(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "66d7cd5f",
   "metadata": {
    "id": "66d7cd5f",
    "outputId": "9d910330-194a-47df-a59a-24a0ac0c48ec"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['183-qtdbSel100MLII', [[1226839200, 1226959200]]],\n",
       " ['194-sddb49', [[1243204200, 1243279200]]],\n",
       " ['069-DISTORTEDinsectEPG5', [[1225369200, 1225369500]]],\n",
       " ['023-DISTORTEDGP711MarkerLFM5z5', [[1225402800, 1225434000]]],\n",
       " ['212-Italianpowerdemand', [[1231663200, 1231670400]]],\n",
       " ['180-ltstdbs30791ES', [[1238599200, 1238659200]]],\n",
       " ['058-DISTORTEDapneaecg', [[1226491200, 1226511600]]],\n",
       " ['130-GP711MarkerLFM5z4', [[1224777300, 1224812700]]],\n",
       " ['079-DISTORTEDresperation2', [[1273294200, 1273294200]]],\n",
       " ['224-mit14046longtermecg', [[1280179200, 1280329200]]],\n",
       " ['044-DISTORTEDPowerDemand1', [[1228364700, 1228465500]]],\n",
       " ['233-mit14157longtermecg', [[1230169200, 1230169500]]],\n",
       " ['042-DISTORTEDLab2Cmac011215EPG6', [[1226476200, 1226545200]]],\n",
       " ['114-CIMIS44AirTemperature2', [[1224530100, 1224537300]]],\n",
       " ['204-CHARISfive', [[1227319200, 1227340200]]],\n",
       " ['241-taichidbS0715Master', [[1474039200, 1474549200]]],\n",
       " ['028-DISTORTEDInternalBleeding17', [[1223778600, 1223811900]]],\n",
       " ['026-DISTORTEDInternalBleeding15', [[1224524400, 1224575400]]],\n",
       " ['092-DISTORTEDtiltAPB4', [[1243217700, 1243218000]]],\n",
       " ['214-STAFFIIIDatabase', [[1260535200, 1260730200]]],\n",
       " ['070-DISTORTEDltstdbs30791AI', [[1238599200, 1238659200]]],\n",
       " ['006-DISTORTEDCIMIS44AirTemperature2', [[1224530100, 1224537300]]],\n",
       " ['036-DISTORTEDInternalBleeding9', [[1224798900, 1224823500]]],\n",
       " ['198-tiltAPB2', [[1260066900, 1260314700]]],\n",
       " ['149-Lab2Cmac011215EPG5', [[1228036200, 1228075200]]],\n",
       " ['032-DISTORTEDInternalBleeding4', [[1224221700, 1224329100]]],\n",
       " ['099-NOISEInternalBleeding6', [[1223861400, 1223907900]]],\n",
       " ['137-InternalBleeding18', [[1224164700, 1224195300]]],\n",
       " ['216-STAFFIIIDatabase', [[1271035200, 1271230200]]],\n",
       " ['057-DISTORTEDapneaecg4', [[1227619200, 1227649200]]],\n",
       " ['087-DISTORTEDsel840mECG1', [[1238230200, 1238341200]]],\n",
       " ['103-NOISETkeepThirdMARS', [[1224232500, 1224261900]]],\n",
       " ['223-mit14046longtermecg', [[1262179200, 1262329200]]],\n",
       " ['179-ltstdbs30791AS', [[1238599200, 1238659200]]],\n",
       " ['178-ltstdbs30791AI', [[1238599200, 1238659200]]],\n",
       " ['189-resperation3', [[1270294200, 1270294500]]],\n",
       " ['225-mit14046longtermecg', [[1265719200, 1265809200]]],\n",
       " ['190-resperation4', [[1261348200, 1261348500]]],\n",
       " ['229-mit14134longtermecg', [[1240207200, 1240210200]]],\n",
       " ['186-resperation1', [[1255897200, 1255942800]]],\n",
       " ['064-DISTORTEDgaitHunt3', [[1234339200, 1234579200]]],\n",
       " ['219-STAFFIIIDatabase', [[1242208800, 1242274800]]],\n",
       " ['172-gaitHunt3', [[1234339200, 1234579200]]],\n",
       " ['144-InternalBleeding9', [[1224798900, 1224823500]]],\n",
       " ['027-DISTORTEDInternalBleeding16', [[1224075300, 1224078900]]],\n",
       " ['033-DISTORTEDInternalBleeding5', [[1224679200, 1224730200]]],\n",
       " ['005-DISTORTEDCIMIS44AirTemperature1', [[1224436500, 1224436800]]],\n",
       " ['003-DISTORTED3sddb40', [[1236799200, 1236889200]]],\n",
       " ['250-weallwalk', [[1225006200, 1225008000]]],\n",
       " ['039-DISTORTEDLab2Cmac011215EPG3', [[1227736200, 1227745200]]],\n",
       " ['111-3sddb40', [[1236799200, 1236889200]]],\n",
       " ['150-Lab2Cmac011215EPG6', [[1226476200, 1226545200]]],\n",
       " ['002-DISTORTED2sddb40', [[1239799200, 1239889200]]],\n",
       " ['221-STAFFIIIDatabase', [[1271908800, 1272274800]]],\n",
       " ['077-DISTORTEDresperation11', [[1256059200, 1256059500]]],\n",
       " ['227-mit14134longtermecg', [[1231519200, 1231549200]]],\n",
       " ['222-mit14046longtermecg', [[1250179200, 1250329200]]],\n",
       " ['066-DISTORTEDinsectEPG2', [[1225219200, 1225226700]]],\n",
       " ['013-DISTORTEDECG3', [[1227619200, 1227649200]]],\n",
       " ['051-DISTORTEDTkeepSecondMARS', [[1225618200, 1225621200]]],\n",
       " ['163-apneaecg2', [[1229104200, 1229149200]]],\n",
       " ['141-InternalBleeding5', [[1224679200, 1224730200]]],\n",
       " ['041-DISTORTEDLab2Cmac011215EPG5', [[1228036200, 1228075200]]],\n",
       " ['151-MesoplodonDensirostris', [[1228603200, 1228651200]]],\n",
       " ['213-STAFFIIIDatabase', [[1260895200, 1261030200]]],\n",
       " ['016-DISTORTEDECG4', [[1227889200, 1227949200]]],\n",
       " ['074-DISTORTEDqtdbSel1005V', [[1226539200, 1226659200]]],\n",
       " ['045-DISTORTEDPowerDemand2', [[1229826300, 1229934300]]],\n",
       " ['247-tilt12755mtable', [[1259389200, 1259413200]]],\n",
       " ['147-Lab2Cmac011215EPG3', [[1227736200, 1227745200]]],\n",
       " ['104-NOISEapneaecg4', [[1227619200, 1227649200]]],\n",
       " ['062-DISTORTEDgaitHunt1', [[1232740200, 1232773200]]],\n",
       " ['108-NOISEresperation2', [[1273294200, 1273294200]]],\n",
       " ['125-ECG4', [[1227919200, 1227949200]]],\n",
       " ['030-DISTORTEDInternalBleeding19', [[1224075300, 1224078300]]],\n",
       " ['106-NOISEgaitHunt2', [[1232179200, 1232374200]]],\n",
       " ['146-Lab2Cmac011215EPG2', [[1231177800, 1231198800]]],\n",
       " ['084-DISTORTEDs20101mML2', [[1233551400, 1233581400]]],\n",
       " ['022-DISTORTEDGP711MarkerLFM5z4', [[1224777300, 1224812700]]],\n",
       " ['060-DISTORTEDgait2', [[1236769200, 1236859200]]],\n",
       " ['202-CHARISfive', [[1226118600, 1226127600]]],\n",
       " ['210-Italianpowerdemand', [[1245289200, 1245318000]]],\n",
       " ['242-tilt12744mtable', [[1254208200, 1254286200]]],\n",
       " ['126-ECG4', [[1227919200, 1227949200]]],\n",
       " ['055-DISTORTEDapneaecg2', [[1229104200, 1229149200]]],\n",
       " ['165-apneaecg4', [[1227619200, 1227649200]]],\n",
       " ['059-DISTORTEDgait1', [[1234369200, 1234459200]]],\n",
       " ['220-STAFFIIIDatabase', [[1298035200, 1298230200]]],\n",
       " ['199-tiltAPB3', [[1257019200, 1257130200]]],\n",
       " ['048-DISTORTEDTkeepFifthMARS', [[1224615600, 1224644700]]],\n",
       " ['193-s20101m', [[1233551400, 1233581400]]],\n",
       " ['232-mit14134longtermecg', [[1240078200, 1240156200]]],\n",
       " ['152-PowerDemand1', [[1228364700, 1228465500]]],\n",
       " ['164-apneaecg3', [[1226152500, 1226182500]]],\n",
       " ['020-DISTORTEDGP711MarkerLFM5z2', [[1224971700, 1225035600]]],\n",
       " ['244-tilt12754table', [[1254208200, 1254286200]]],\n",
       " ['173-insectEPG1', [[1224919200, 1224928200]]],\n",
       " ['124-ECG4', [[1227889200, 1227949200]]],\n",
       " ['208-CHARISten', [[1231197900, 1231215900]]],\n",
       " ['158-TkeepForthMARS', [[1224615600, 1224644700]]],\n",
       " ['053-DISTORTEDWalkingAceleration1', [[1223648400, 1223717700]]],\n",
       " ['031-DISTORTEDInternalBleeding20', [[1224546900, 1224594900]]],\n",
       " ['015-DISTORTEDECG4', [[1227859200, 1227949200]]],\n",
       " ['056-DISTORTEDapneaecg3', [[1226152500, 1226182500]]],\n",
       " ['184-resperation10', [[1262029200, 1262383200]]],\n",
       " ['157-TkeepFirstMARS', [[1224428700, 1224433200]]],\n",
       " ['035-DISTORTEDInternalBleeding8', [[1224578700, 1224611400]]],\n",
       " ['226-mit14046longtermecg', [[1259719200, 1259809200]]],\n",
       " ['052-DISTORTEDTkeepThirdMARS', [[1224232500, 1224261900]]],\n",
       " ['171-gaitHunt2', [[1232179200, 1232374200]]],\n",
       " ['160-TkeepThirdMARS', [[1224232500, 1224261900]]],\n",
       " ['231-mit14134longtermecg', [[1237078200, 1237156200]]],\n",
       " ['001-DISTORTED1sddb40', [[1238419200, 1238605200]]],\n",
       " ['113-CIMIS44AirTemperature1', [[1224436500, 1224436800]]],\n",
       " ['040-DISTORTEDLab2Cmac011215EPG4', [[1228036200, 1228075200]]],\n",
       " ['093-NOISE1sddb40', [[1238419200, 1238605200]]],\n",
       " ['228-mit14134longtermecg', [[1237168200, 1237174200]]],\n",
       " ['102-NOISEMesoplodonDensirostris', [[1228603200, 1228651200]]],\n",
       " ['096-NOISEECG4', [[1227889200, 1227949200]]],\n",
       " ['136-InternalBleeding17', [[1223778600, 1223811900]]],\n",
       " ['082-DISTORTEDresperation4', [[1261348200, 1261348500]]],\n",
       " ['097-NOISEGP711MarkerLFM5z3', [[1224603600, 1224617100]]],\n",
       " ['012-DISTORTEDECG2', [[1227619200, 1227649200]]],\n",
       " ['246-tilt12755mtable', [[1304059200, 1304140200]]],\n",
       " ['107-NOISEinsectEPG3', [[1224919200, 1224934200]]],\n",
       " ['120-ECG2', [[1227619200, 1227649200]]],\n",
       " ['134-InternalBleeding15', [[1224524400, 1224575400]]],\n",
       " ['004-DISTORTEDBIDMC1', [[1224439200, 1224499200]]],\n",
       " ['207-CHARISten', [[1230897900, 1230915900]]],\n",
       " ['073-DISTORTEDpark3m', [[1244464200, 1244567700]]],\n",
       " ['008-DISTORTEDCIMIS44AirTemperature4', [[1224483900, 1224498300]]],\n",
       " ['105-NOISEgait3', [[1240789200, 1240969200]]],\n",
       " ['068-DISTORTEDinsectEPG4', [[1224771600, 1224786600]]],\n",
       " ['139-InternalBleeding20', [[1224546900, 1224594900]]],\n",
       " ['017-DISTORTEDECG4', [[1227919200, 1227949200]]],\n",
       " ['196-sel840mECG2', [[1237630200, 1237741200]]],\n",
       " ['086-DISTORTEDsddb49', [[1243204200, 1243279200]]],\n",
       " ['085-DISTORTEDs20101m', [[1233551400, 1233581400]]],\n",
       " ['217-STAFFIIIDatabase', [[1268035200, 1268230200]]],\n",
       " ['211-Italianpowerdemand', [[1234591200, 1234620000]]],\n",
       " ['181-park3m', [[1244464200, 1244567700]]],\n",
       " ['129-GP711MarkerLFM5z3', [[1224603600, 1224617100]]],\n",
       " ['245-tilt12754table', [[1304059200, 1304140200]]],\n",
       " ['234-mit14157longtermecg', [[1230199200, 1230199500]]],\n",
       " ['161-WalkingAceleration1', [[1223648400, 1223717700]]],\n",
       " ['067-DISTORTEDinsectEPG3', [[1224919200, 1224934200]]],\n",
       " ['170-gaitHunt1', [[1232740200, 1232773200]]],\n",
       " ['118-CIMIS44AirTemperature6', [[1224621000, 1224635400]]],\n",
       " ['143-InternalBleeding8', [[1224578700, 1224611400]]],\n",
       " ['155-PowerDemand4', [[1230020700, 1230042300]]],\n",
       " ['162-WalkingAceleration5', [[1224595200, 1224612900]]],\n",
       " ['191-resperation9', [[1265842500, 1265872500]]],\n",
       " ['034-DISTORTEDInternalBleeding6', [[1223861400, 1223907900]]],\n",
       " ['110-2sddb40', [[1239799200, 1239889200]]],\n",
       " ['009-DISTORTEDCIMIS44AirTemperature5', [[1224274800, 1224289200]]],\n",
       " ['133-InternalBleeding14', [[1224501300, 1224509400]]],\n",
       " ['088-DISTORTEDsel840mECG2', [[1237630200, 1237741200]]],\n",
       " ['203-CHARISfive', [[1226117700, 1226127600]]],\n",
       " ['010-DISTORTEDCIMIS44AirTemperature6', [[1224621000, 1224635400]]],\n",
       " ['089-DISTORTEDtiltAPB1', [[1257104100, 1257124200]]],\n",
       " ['037-DISTORTEDLab2Cmac011215EPG1', [[1227982200, 1227997200]]],\n",
       " ['249-weallwalk', [[1225304700, 1225313700]]],\n",
       " ['081-DISTORTEDresperation3', [[1270294200, 1270294500]]],\n",
       " ['128-GP711MarkerLFM5z2', [[1224971700, 1225035600]]],\n",
       " ['201-CHARISfive', [[1227919500, 1227924000]]],\n",
       " ['071-DISTORTEDltstdbs30791AS', [[1238599200, 1238659200]]],\n",
       " ['083-DISTORTEDresperation9', [[1265842500, 1265872500]]],\n",
       " ['166-apneaecg', [[1226491200, 1226511600]]],\n",
       " ['235-mit14157longtermecg', [[1245454200, 1245454500]]],\n",
       " ['230-mit14134longtermecg', [[1228672200, 1228702200]]],\n",
       " ['091-DISTORTEDtiltAPB3', [[1257019200, 1257130200]]],\n",
       " ['148-Lab2Cmac011215EPG4', [[1228036200, 1228075200]]],\n",
       " ['049-DISTORTEDTkeepFirstMARS', [[1224428700, 1224433200]]],\n",
       " ['175-insectEPG3', [[1224919200, 1224934200]]],\n",
       " ['038-DISTORTEDLab2Cmac011215EPG2', [[1231177800, 1231198800]]],\n",
       " ['236-mit14157longtermecg', [[1236724200, 1236736200]]],\n",
       " ['167-gait1', [[1234369200, 1234459200]]],\n",
       " ['243-tilt12744mtable', [[1283825700, 1283839200]]],\n",
       " ['187-resperation2', [[1273294200, 1273294200]]],\n",
       " ['043-DISTORTEDMesoplodonDensirostris', [[1228603200, 1228651200]]],\n",
       " ['174-insectEPG2', [[1225219200, 1225226700]]],\n",
       " ['182-qtdbSel1005V', [[1226539200, 1226659200]]],\n",
       " ['153-PowerDemand2', [[1229826300, 1229934300]]],\n",
       " ['014-DISTORTEDECG3', [[1227919200, 1227949200]]],\n",
       " ['095-NOISECIMIS44AirTemperature4', [[1224483900, 1224498300]]],\n",
       " ['237-mit14157longtermecg', [[1249687200, 1249930200]]],\n",
       " ['238-mit14157longtermecg', [[1244599200, 1244653200]]],\n",
       " ['078-DISTORTEDresperation1', [[1255897200, 1255942800]]],\n",
       " ['156-TkeepFifthMARS', [[1224615600, 1224644700]]],\n",
       " ['116-CIMIS44AirTemperature4', [[1224483900, 1224498300]]],\n",
       " ['011-DISTORTEDECG1', [[1226359200, 1226449200]]],\n",
       " ['007-DISTORTEDCIMIS44AirTemperature3', [[1224775200, 1224782400]]],\n",
       " ['169-gait3', [[1240789200, 1240969200]]],\n",
       " ['218-STAFFIIIDatabase', [[1286035200, 1286230200]]],\n",
       " ['054-DISTORTEDWalkingAceleration5', [[1224595200, 1224612900]]],\n",
       " ['209-Fantasia', [[1230910200, 1231000200]]],\n",
       " ['046-DISTORTEDPowerDemand3', [[1229840700, 1229862300]]],\n",
       " ['025-DISTORTEDInternalBleeding14', [[1224501300, 1224509400]]],\n",
       " ['094-NOISEBIDMC1', [[1224439200, 1224499200]]],\n",
       " ['142-InternalBleeding6', [[1223861400, 1223907900]]],\n",
       " ['159-TkeepSecondMARS', [[1225618200, 1225621200]]],\n",
       " ['206-CHARISten', [[1231543200, 1231561200]]],\n",
       " ['138-InternalBleeding19', [[1224075300, 1224078300]]],\n",
       " ['123-ECG4', [[1227859200, 1227949200]]],\n",
       " ['135-InternalBleeding16', [[1224075300, 1224078900]]],\n",
       " ['192-s20101mML2', [[1233551400, 1233581400]]],\n",
       " ['185-resperation11', [[1256059200, 1256059500]]],\n",
       " ['115-CIMIS44AirTemperature3', [[1224775200, 1224782400]]],\n",
       " ['140-InternalBleeding4', [[1224221700, 1224329100]]],\n",
       " ['101-NOISELab2Cmac011215EPG4', [[1228036200, 1228075200]]],\n",
       " ['080-DISTORTEDresperation2', [[1273294200, 1273294500]]],\n",
       " ['119-ECG1', [[1226359200, 1226449200]]],\n",
       " ['200-tiltAPB4', [[1243217700, 1243218000]]],\n",
       " ['090-DISTORTEDtiltAPB2', [[1260066900, 1260314700]]],\n",
       " ['177-insectEPG5', [[1225369200, 1225369500]]],\n",
       " ['205-CHARISfive', [[1231517700, 1231544700]]],\n",
       " ['061-DISTORTEDgait3', [[1240789200, 1240969200]]],\n",
       " ['047-DISTORTEDPowerDemand4', [[1230020700, 1230042300]]],\n",
       " ['117-CIMIS44AirTemperature5', [[1224274800, 1224289200]]],\n",
       " ['145-Lab2Cmac011215EPG1', [[1227982200, 1227997200]]],\n",
       " ['019-DISTORTEDGP711MarkerLFM5z1', [[1224669600, 1224682800]]],\n",
       " ['131-GP711MarkerLFM5z5', [[1225402800, 1225434000]]],\n",
       " ['127-GP711MarkerLFM5z1', [[1224669600, 1224682800]]],\n",
       " ['021-DISTORTEDGP711MarkerLFM5z3', [[1224603600, 1224617100]]],\n",
       " ['168-gait2', [[1236769200, 1236859200]]],\n",
       " ['065-DISTORTEDinsectEPG1', [[1224919200, 1224928200]]],\n",
       " ['154-PowerDemand3', [[1229840700, 1229862300]]],\n",
       " ['072-DISTORTEDltstdbs30791ES', [[1238599200, 1238659200]]],\n",
       " ['121-ECG3', [[1227619200, 1227649200]]],\n",
       " ['076-DISTORTEDresperation10', [[1262029200, 1262383200]]],\n",
       " ['029-DISTORTEDInternalBleeding18', [[1224164700, 1224195300]]],\n",
       " ['248-weallwalk', [[1224229800, 1224231300]]],\n",
       " ['239-taichidbS0715Master', [[1400854200, 1400873400]]],\n",
       " ['024-DISTORTEDInternalBleeding10', [[1224177000, 1224186000]]],\n",
       " ['195-sel840mECG1', [[1238230200, 1238341200]]],\n",
       " ['188-resperation2', [[1273294200, 1273294500]]],\n",
       " ['112-BIDMC1', [[1224439200, 1224499200]]],\n",
       " ['122-ECG3', [[1227919200, 1227949200]]],\n",
       " ['100-NOISELab2Cmac011215EPG1', [[1227982200, 1227997200]]],\n",
       " ['215-STAFFIIIDatabase', [[1254835200, 1255030200]]],\n",
       " ['240-taichidbS0715Master', [[1488049200, 1488079200]]],\n",
       " ['018-DISTORTEDECG4', [[1227919200, 1227949200]]],\n",
       " ['197-tiltAPB1', [[1257104100, 1257124200]]],\n",
       " ['050-DISTORTEDTkeepForthMARS', [[1224615600, 1224644700]]],\n",
       " ['063-DISTORTEDgaitHunt2', [[1232179200, 1232374200]]],\n",
       " ['075-DISTORTEDqtdbSel100MLII', [[1226839200, 1226959200]]],\n",
       " ['132-InternalBleeding10', [[1224177000, 1224186000]]],\n",
       " ['109-1sddb40', [[1238419200, 1238605200]]],\n",
       " ['176-insectEPG4', [[1224771600, 1224786600]]],\n",
       " ['098-NOISEInternalBleeding16', [[1224075300, 1224078900]]]]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "65e79159",
   "metadata": {
    "id": "65e79159"
   },
   "outputs": [],
   "source": [
    "# save anomalies\n",
    "\n",
    "with open(SAVE_TO + '/anomalies.csv', 'w', newline='') as file:\n",
    "    writer = csv.writer(file)\n",
    "    writer.writerows(rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "eaa00b2f",
   "metadata": {
    "id": "eaa00b2f"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>signal</th>\n",
       "      <th>events</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>183-qtdbSel100MLII</td>\n",
       "      <td>[[1226839200, 1226959200]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>194-sddb49</td>\n",
       "      <td>[[1243204200, 1243279200]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>069-DISTORTEDinsectEPG5</td>\n",
       "      <td>[[1225369200, 1225369500]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>023-DISTORTEDGP711MarkerLFM5z5</td>\n",
       "      <td>[[1225402800, 1225434000]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>212-Italianpowerdemand</td>\n",
       "      <td>[[1231663200, 1231670400]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>245</th>\n",
       "      <td>075-DISTORTEDqtdbSel100MLII</td>\n",
       "      <td>[[1226839200, 1226959200]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246</th>\n",
       "      <td>132-InternalBleeding10</td>\n",
       "      <td>[[1224177000, 1224186000]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>247</th>\n",
       "      <td>109-1sddb40</td>\n",
       "      <td>[[1238419200, 1238605200]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>248</th>\n",
       "      <td>176-insectEPG4</td>\n",
       "      <td>[[1224771600, 1224786600]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>249</th>\n",
       "      <td>098-NOISEInternalBleeding16</td>\n",
       "      <td>[[1224075300, 1224078900]]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>250 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             signal                      events\n",
       "0                183-qtdbSel100MLII  [[1226839200, 1226959200]]\n",
       "1                        194-sddb49  [[1243204200, 1243279200]]\n",
       "2           069-DISTORTEDinsectEPG5  [[1225369200, 1225369500]]\n",
       "3    023-DISTORTEDGP711MarkerLFM5z5  [[1225402800, 1225434000]]\n",
       "4            212-Italianpowerdemand  [[1231663200, 1231670400]]\n",
       "..                              ...                         ...\n",
       "245     075-DISTORTEDqtdbSel100MLII  [[1226839200, 1226959200]]\n",
       "246          132-InternalBleeding10  [[1224177000, 1224186000]]\n",
       "247                     109-1sddb40  [[1238419200, 1238605200]]\n",
       "248                  176-insectEPG4  [[1224771600, 1224786600]]\n",
       "249     098-NOISEInternalBleeding16  [[1224075300, 1224078900]]\n",
       "\n",
       "[250 rows x 2 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_labels = pd.read_csv(SAVE_TO + '/anomalies.csv', header=None, names=['signal', 'events'])\n",
    "new_labels"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
